Load packages
library(tidyverse)
library(readxl)
library(cowplot)
Import data
#Anna's Laptop
#Data <- read_xlsx("~/Desktop/PhysiologyDatabaseVersion5.xlsx", sheet = "T3", na = c("NA", ""))
# Mariana's desktop
#Data <- read_xlsx("/Users/marianaabarcazama/Desktop/Projects/ThermalPerformance/PhysiologyDatabaseVersion5.xlsx", sheet = "T3", na = c("NA", ""))
# Mariana's laptop
#Data <- read_xlsx("/Users/mar/Desktop/Projects/ThermalPerformance/PhysiologyDatabaseVersion5.xlsx", sheet = "T3", na = c("NA", ""))
Remove parasitoids and convert character to factor
unique(Data$status)
Error in unique(Data$status) : object 'Data' not found
Calculate development rate
When all larvae in a treatment die, their development time is “NA”, however their development rate should be “0”. The loop below calculates development rate (1/dt) and assigns 0 to all cases where both survival is 0 and development time is “NA”. There is one case in which development time is less than 1, it was rounded to 1.
fast <- filter(Ana, dt < 1)
Ana <- Ana %>%
mutate(dt = ifelse(dt < 1, ceiling(dt),dt))
sets <- unique(Ana$set)
rates <- tribble(~set, ~temp, ~dr)
rates2 <- tribble(~set, ~temp, ~dr)
output.table <- tibble()
for(i in seq_along(sets)) {
# make a table per set and determine whether it includes dt data
seti <- sets[i]
table <- filter(Ana, set == seti)
suma <- sum(table$dt, na.rm = T)
# if there is dt data
if (suma > 0){
clement <- filter(table, dt > 0) [["temp"]] # temperatures that allow for development
hotlim <- max(clement, na.rm = T) # max temp that permits development
coldlim <- min(clement, na.rm = T) # min temp that permits development
tempes <- unique(table$temp) # each temperature included in set i
# evaluate each temperature of a set
for(ii in 1:length(unique(tempes))){
tempii <- tempes[ii]
# if it is lower than coldlim, dr should be zero
if (table$temp[ii] < coldlim){
dr <- 0
output.row <- cbind(set = seti, temp = tempii, dr = dr)
# if it is higher than hotlim, dr should be zero
} else if (table$temp[ii] > hotlim) {
dr <- 0
output.row <- cbind(set = seti, temp = tempii, dr = dr)
# in all other cases, it should be 1/dt
} else{
dr <- 1/table$dt[ii]
output.row <- cbind(set = seti, temp = tempii, dr = dr)
}
output.table <- rbind(output.table, output.row)
}
# print stuff to make sure loop is working
#cat(seti, " ")
# if there is no development time in that set, dr should be zero
} else {
#cat("set: ", seti, "has no dt \n")
}
}
rates <- output.table
Ana_rates <- left_join(Ana, rates, by = c("set","temp"))
rm(fast, output.row, output.table, rates, rates2, table)
rm(clement, coldlim, dr, hotlim, i, ii, seti, sets, suma, tempes, tempii)
Summary statistics
There are 23 Lepidoptera families in the dataset.
family_sets <- Ana_rates %>%
select(set, family) %>%
distinct() %>%
group_by(family) %>%
summarise(sets = length(unique(set)))
Each family is represented by 3 to 405 sets.
ggplot(family_sets, aes(x = reorder(family, - sets), y = sets)) +
geom_col()+
geom_hline(yintercept = 50)+
geom_hline(yintercept = 25, linetype = 2)+
theme_cowplot()+
xlab("Family")+
coord_flip()

Figure 1. Number of sets per family, lines indicate 25 and 50 counts
species <- Ana_rates %>%
select(family, sp) %>%
distinct() %>%
group_by(family) %>%
summarise(sp_count = length(unique(sp))) %>%
arrange(sp_count)
ggplot(species, aes(x = reorder(family, - sp_count), y = sp_count)) +
geom_col()+
geom_hline(yintercept = 10, linetype = 2)+
geom_hline(yintercept = 5, linetype = 1)+
theme_cowplot()+
xlab("Family")+
ylab("Species")+
coord_flip()

Figure 2. Number of species per family, lines indicate 5 and 10 species counts.
lifestage_sets <- Ana_rates %>%
select(set, lifestage) %>%
distinct() %>%
group_by(lifestage) %>%
summarise(sets = length(unique(set))) %>%
arrange(sets)
ggplot(lifestage_sets, aes(x = reorder(lifestage, sets), y = sets)) +
geom_col()+
geom_hline(yintercept = 50)+
geom_hline(yintercept = 25, linetype = 2)+
theme_cowplot()+
xlab("Life stage")+
coord_flip()

Figure 3. Number of sets measuring each lifestage.
lifestage_sets_2 <- Ana_rates %>%
select(set, lifestage) %>%
distinct() %>%
group_by(lifestage) %>%
summarise(sets = length(unique(set))) %>%
arrange(sets) %>%
filter(sets > 4)
ggplot(lifestage_sets_2, aes(x = reorder(lifestage, sets), y = sets)) +
geom_col()+
geom_hline(yintercept = 50)+
geom_hline(yintercept = 25, linetype = 2)+
theme_cowplot()+
xlab("Life stage")+
coord_flip()

Figure 4. Number of sets per lifestage (only lifestages that have at least 5 sets). Lines at 50 and 25.
Sets with both development rate and survival
interval_sets <- Ana_rates %>%
group_by(set) %>%
mutate(dr_sum = sum(dr),
n_temps = length(unique(temp)),
validcount = sum(!is.na(dt)),
validcount_s = sum(!is.na(survival))) %>%
filter(n_temps > 3, dr_sum > 0, validcount > 3, validcount_s > 3)
interval_set_list <- unique(interval_sets$set)
interval <- Ana_rates %>% filter(set %in% interval_set_list)
main_stages <- Ana_rates %>%
filter(lifestage == "egg"|lifestage == "larva"|lifestage == "pupa")
main_stages_list <- unique(main_stages$lifestage)
inter <- interval %>% filter(lifestage %in% main_stages_list)
inter$lifestage <- factor(inter$lifestage)
There are 240 sets with both survival and development time data, from 57 families and 21 lifestages.
For the three main lifestages (egg, larva, pupa), there are 137 sets with both survival and development time data, from 51 species.
Upper Limit Exploration
At the upper thermal ranges contained in the dataset, we are interested in seeing whether we capture the ranges at which survival/development rate begin to decrease. The function below (is.rise) can be applied to development rate and to survival data. It returns a table with 6 columns: just.rise: TURE/FALSE. TRUE for curves missing the range at which performance decreases. ntemp: number of temperature treatments colds: number of temperature treatments below optimum hots: number of temperature treatments above optimum opts: number of temperatures that maximize performance (it’s common to observe multiple survival optima) response: dr/survival
is.rise <- function(table, response){
ta <- select(table, temp, response) # make a 2 column table
if(nrow(ta) == sum(is.na(ta[,2]))){ # this is to discard sets with only NA # values
print("No data")
}else{
# get:
# maximum performance
p.large <- max(ta[,2], na.rm = T)
# maximum temperature
t.max <- max(ta[,1], na.rm = T)
# coldest temperature that maximizes performance
t.large <- filter(ta, ta[,2] == p.large)[["temp"]][[1]]
# hotest temperature that maximizes performance (eg, survival can be 1 at
# multiple temperatures)
t.largemax <- max(t.large, na.rm = T)
# temperatures below the coldest optimum
colds <- filter(ta, ta[,1] < t.large)
# temperature above the hotest optimum
hots <- filter(ta, temp > t.largemax)
# temperatures that maximize performance
opts <- filter(ta, ta[,2] == p.large)
#output table
output <- tibble(just.rise = t.max == t.largemax, # TRUE for curves missing fall
ntemp = nrow(ta), # number of temperature treatments
colds = nrow(colds), # number of temps below optimum
hots = nrow(hots), # number of temps above optimum
opts = nrow(opts), # number of optimum temps
response = response) # dr or survival
output
}
}
Function to determine if minimum development time occurs at the highest temperature
# Is development time minimum at the highest temperature? ---------------------------------------------------------------
where.min.dt <- function(table){
ta <- select(table, temp, dt) # make a 2 column table
if(nrow(ta) == sum(is.na(ta[,2]))){ # this is to discard sets with only NA # values
print("No data")
}else{
# get:
# minimum development time
min.dt <- min(ta$dt, na.rm = T)
# maximum temperature
t.max <- max(ta$temp, na.rm = T)
min.dt.t <- tail(filter(ta, dt == min.dt)[["temp"]])
#output table
output <- tibble(is.min.at.hottest = t.max == min.dt.t) # TRUE when min at hotest
output
}
}
Calculate the number of sets where the maximum survival is not at the highest measured temp
This code applies the function above to each set, the result is a tibble of tables
qualitycheck <- inter %>%
group_by(set) %>%
nest() %>%
mutate(quality_dr = map2(data, "dr", is.rise),
quality_sur = map2(data, "survival", is.rise),
is.min.dt.at.max.t = map(data, where.min.dt))
Get only the quality metrics by set
Qmetrics <- select(qualitycheck, set, quality_dr, quality_sur, is.min.dt.at.max.t) %>%
unnest( cols = quality_dr, quality_sur, is.min.dt.at.max.t)
unnest() has a new interface. See ?unnest for details.
Try `df %>% unnest(c(quality_dr, quality_sur, is.min.dt.at.max.t))`, with `mutate()` if needed
Number of sets where the minimum development time is not at the highest measured temp: 109
There are 137 sets and most of them 114 report a fall in development time as temperature increases.
There are 148 sets in which the highest survival is not at the highest temperature
What are the most popular temperature treatments implemented?
ggplot(inter, aes(x = temp))+
geom_bar()+
theme_cowplot()+
facet_grid(~lifestage)

Figure 5. Frequency of temperature treatments in “inter” data set by life stage. The most popular temperature treatments are 15, 20, 25, 30, and 35.
Exploratory Graphs
These graphs represent some early data exploration, using the subset of the data that contains both survival and development time for the three main lifestages (egg, larva, pupa).
drhist <- ggplot(Qmetrics, aes(x = hots))+
geom_histogram() +
xlab("temps above dr optimum")+
theme_cowplot()
survivalhist <- ggplot(Qmetrics, aes(x = hots1))+
geom_histogram() +
xlab("temps above survival optimum")+
theme_cowplot()
plot_grid(drhist, survivalhist)
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Figure 6. Histograms showing most sets in the “inter” subset include at least one temperature above the dr optimum and multple above the survival optimum
Scale development time
Function to extract longest development time by set and create a new column with scaled development time. s_dt = 1 - (dt/maxdt).
scale.dt <- function(table){
maxdt <- max(table$dt, na.rm = T)
table$s_dt <- 1 - (table$dt/maxdt)
table
}
Apply the function to each set and add s_dt column (scaled dt)
inter <- inter %>%
group_by(set) %>%
nest() %>%
mutate(s_dt = map(data, scale.dt)) %>%
unnest(cols = s_dt) %>%
select(-data)
Plot to check that s_dt was calculated properly
ggplot(filter(inter, set == 941|set == 1179| set == 3), aes(x = dt, y = s_dt, col = factor(set)))+
geom_point()+
scale_color_viridis_d()+
theme_cowplot()

dt: development time, s_dt, scaled development time (0 for longest)
egg <- inter[inter$lifestage == "egg", ]
egg$set <- factor(egg$set)
larva <- inter[inter$lifestage == "larva", ]
larva$set <- factor(larva$set)
pupa <- inter[inter$lifestage == "pupa", ]
pupa$set <- factor(pupa$set)
Graph the survival data vs. temperature within each lifestage, with each set as a line, faceted by family
ggplot(data = egg, aes(x = temp, y = survival, color = set, group = set)) +
geom_line() +
facet_wrap(~family) +
labs(x = "Temperature", y = "Survival (percentage)", title = "Egg Survival")

Figure 5. Egg survival at a range of temperatures, faceted by family.
ggplot(data = larva, aes(x = temp, y = survival, color = set, group = set)) +
geom_line() +
facet_wrap(~family) +
labs(x = "Temperature", y = "Survival (percentage)", title = "Larval Survival")

Figure 6. Larval survival at a range of temperatures, faceted by family.
ggplot(data = pupa, aes(x = temp, y = survival, color = set, group = set)) +
geom_line() +
facet_wrap(~family) +
labs(x = "Temperature", y = "Survival (percentage)", title = "Pupal Survival")

Figure 7. Pupal survival at a range of temperatures, faceted by family.
Graph the development time data vs. temperature within each lifestage, with each set as a line, faceted by family
ggplot(data = egg, aes(x = temp, y = dt, color = set, group = set)) +
geom_line() +
facet_wrap(~family) +
labs(x = "Temperature", y = "Development time (days)", title = "Egg Development Time")

Figure 8. Egg development time at a range of temperatures, faceted by family.
ggplot(data = larva, aes(x = temp, y = dt, color = set, group = set)) +
geom_line() +
facet_wrap(~family) +
labs(x = "Temperature", y = "Development time (days)", title = "Larval Development Time")

Figure 9. Larval development time at a range of temperatures, faceted by family.
ggplot(data = pupa, aes(x = temp, y = dt, color = set, group = set)) +
geom_line() +
facet_wrap(~family) +
labs(x = "Temperature", y = "Development time (days)", title = "Pupal Development Time")

Figure 10. Pupal development time at a range of temperatures, faceted by family.
Other exploratory graphs
Noctuids - an example of family-specific graphs
ggplot(data = filter(egg, family == "Noctuidae"), aes(x = temp, y = s_dt, color = set, group = set)) +
geom_line() +
geom_line(data = filter(egg, family == "Noctuidae"), mapping = aes(x = temp, y = survival/100, color = set, group = set), linetype = 2)+
facet_wrap(~sp) +
labs(x = "Temperature", y = "Scaled dt / Survival (dashed)", title = "Noctuid Egg Development and Survival")

Figure 11. Egg development rate and survival in the Noctuidae family, faceted by species.
ggplot(data = filter(larva, family == "Noctuidae"), aes(x = temp, y = s_dt, color = set, group = set)) +
geom_line() +
geom_line(data = filter(larva, family == "Noctuidae"), mapping = aes(x = temp, y = survival/100, color = set, group = set), linetype = 2)+
facet_wrap(~sp) +
labs(x = "Temperature", y = "Scaled dev time / Survival (dashed)", title = "Noctuid Larval Development and Survival")

Figure 12. Larval development rate and survival in the Noctuidae family, faceted by species.
ggplot(data = filter(pupa, family == "Noctuidae"), aes(x = temp, y = s_dt, color = set, group = set)) +
geom_line() +
geom_line(data = filter(pupa, family == "Noctuidae"), mapping = aes(x = temp, y = survival/100, color = set, group = set), linetype = 2)+
facet_wrap(~sp) +
labs(x = "Temperature", y = "Scaled dev time / Survival (dashed)", title = "Noctuid Pupal Development and Survival")

Figure 13. Pupal development rate and survival in the Noctuidae family, faceted by species.
Cummulative survival graphs
For studies that report survival of eggs, larvae and pupa, make graphs of cummulative survival:
# get ids of sets that report survival in all three life stages
egg <- filter(inter, lifestage == "egg", survival >= 0)
egg$set <- factor(egg$set)
larva <- filter(inter, lifestage == "larva", survival >= 1)
larva$set <- factor(larva$set)
pupa <- filter(inter, lifestage == "pupa", survival >= 1)
pupa$set <- factor(pupa$set)
egg_ids <- tibble(id = unique(egg$id))
larva_ids <- tibble(id = unique(larva$id))
pupa_ids <- tibble(id = unique(pupa$id))
fullstudies1 <- intersect(egg_ids,larva_ids)
fullstudies <- intersect(fullstudies1,pupa_ids)
# the new table "ontosurvival" contains only those sets
ontosurvival <- inter %>% filter(id %in% fullstudies$id)
ontosurvival$stagenum <- ifelse(ontosurvival$lifestage == "egg", 1, ifelse(ontosurvival$lifestage == "larva",2, 3))
Making a graph with cumulative survival
Here, I calculate cumulative/total suvival at each temp for each set and attempt to plot it.
head(ontosurvival)
combsurvival <- ontosurvival[, c("set", "id", "sp", "lifestage", "temp", "survival", "stagenum")]
---
title: "Compilation of Lepidoptera thermal performance curves"
output: html_notebook
---
# Load packages
```{r}
library(tidyverse)
library(readxl)
library(cowplot)
```
 
# Import data
```{r}
#Anna's Laptop
#Data <- read_xlsx("~/Desktop/PhysiologyDatabaseVersion5.xlsx", sheet = "T3", na = c("NA", ""))
```

```{r}
# Mariana's desktop
#Data <- read_xlsx("/Users/marianaabarcazama/Desktop/Projects/ThermalPerformance/PhysiologyDatabaseVersion5.xlsx", sheet = "T3", na = c("NA", "")) 

```

```{r}
# Mariana's laptop
#Data <- read_xlsx("/Users/mar/Desktop/Projects/ThermalPerformance/PhysiologyDatabaseVersion5.xlsx", sheet = "T3", na = c("NA", "")) 
```

Remove parasitoids and convert character to factor
```{r}
unique(Data$status)
Ana <- Data[Data$status != "parasitoid",]
rm(Data)

# convert character to factor
Ana <- Ana %>%
  mutate_if(is.character, factor)
```

## Calculate development rate
When all larvae in a treatment die, their development time is "NA", however
their development rate should be "0". The loop below calculates development rate (1/dt) and assigns 0 to all cases where both survival is 0 and development time is "NA".
There is one case in which development time is less than 1, it was rounded to 1.

```{r}
fast <- filter(Ana, dt < 1)
Ana <- Ana %>% 
  mutate(dt = ifelse(dt < 1, ceiling(dt),dt))
  

sets <- unique(Ana$set) 
rates <- tribble(~set, ~temp, ~dr)
rates2 <- tribble(~set, ~temp, ~dr)
output.table <- tibble()

for(i in seq_along(sets)) {
  
  # make a table per set and determine whether it includes dt data
  seti <- sets[i]
  table <- filter(Ana, set == seti)
  suma <- sum(table$dt, na.rm = T)
  
  # if there is dt data
  if (suma > 0){ 
    
    clement <- filter(table, dt > 0) [["temp"]] # temperatures that allow for development
    hotlim <- max(clement, na.rm = T) # max temp that permits development
    coldlim <- min(clement, na.rm = T) # min temp that permits development
    tempes <- unique(table$temp) # each temperature included in set i
    
    # evaluate each temperature of a set
    for(ii in 1:length(unique(tempes))){
      
      tempii <- tempes[ii]
      
      # if it is lower than coldlim, dr should be zero
      if (table$temp[ii] < coldlim){
        dr <- 0
      output.row <- cbind(set = seti, temp = tempii, dr = dr)
      
      # if it is higher than hotlim, dr should be zero  
      } else if (table$temp[ii] > hotlim) {
        
        dr <- 0
      output.row <- cbind(set = seti, temp = tempii, dr = dr)
      
      # in all other cases, it should be 1/dt
      } else{
        dr <- 1/table$dt[ii]
        output.row <- cbind(set = seti, temp = tempii, dr = dr)
      }
      
      output.table <- rbind(output.table, output.row)
    }
    
    # print stuff to make sure loop is working
    #cat(seti, " ")
    
    
  # if there is no development time in that set, dr should be zero
    } else {
    #cat("set: ", seti, "has no dt \n")
  }
  
}

rates <- output.table

Ana_rates <- left_join(Ana, rates, by = c("set","temp"))

rm(fast, output.row, output.table, rates, rates2, table)
rm(clement, coldlim, dr, hotlim, i, ii, seti, sets, suma, tempes, tempii)

```

# Summary statistics
There are `r length(unique(Ana_rates$family))` Lepidoptera families in the dataset.
```{r}
family_sets <-  Ana_rates %>%
  select(set, family) %>% 
  distinct() %>% 
  group_by(family) %>% 
  summarise(sets = length(unique(set)))
```

Each family is represented by `r min(family_sets$sets)` to `r max(family_sets$sets)` sets. 

```{r}
ggplot(family_sets, aes(x = reorder(family, - sets), y = sets)) +
  geom_col()+
  geom_hline(yintercept = 50)+
  geom_hline(yintercept = 25, linetype = 2)+
  theme_cowplot()+
  xlab("Family")+
  coord_flip()
```

Figure 1. Number of sets per family, lines indicate 25 and 50 counts
```{r}
species <-  Ana_rates %>%
  select(family, sp) %>% 
  distinct() %>% 
  group_by(family) %>% 
  summarise(sp_count = length(unique(sp))) %>% 
  arrange(sp_count)

ggplot(species, aes(x = reorder(family, - sp_count), y = sp_count)) +
  geom_col()+
  geom_hline(yintercept = 10, linetype = 2)+
  geom_hline(yintercept = 5, linetype = 1)+
  theme_cowplot()+
  xlab("Family")+
  ylab("Species")+
  coord_flip()
```

Figure 2. Number of species per family, lines indicate 5 and 10 species counts.


```{r}
lifestage_sets <-  Ana_rates %>%
  select(set, lifestage) %>% 
  distinct() %>% 
  group_by(lifestage) %>% 
  summarise(sets = length(unique(set))) %>% 
  arrange(sets)

ggplot(lifestage_sets, aes(x = reorder(lifestage,  sets), y = sets)) +
  geom_col()+
  geom_hline(yintercept = 50)+
  geom_hline(yintercept = 25, linetype = 2)+
  theme_cowplot()+
  xlab("Life stage")+
 
  coord_flip()
```


Figure 3. Number of sets measuring each lifestage.

```{r}
lifestage_sets_2 <-  Ana_rates %>%
  select(set, lifestage) %>% 
  distinct() %>% 
  group_by(lifestage) %>% 
  summarise(sets = length(unique(set))) %>% 
  arrange(sets) %>% 
  filter(sets > 4)

ggplot(lifestage_sets_2, aes(x = reorder(lifestage,  sets), y = sets)) +
  geom_col()+
  geom_hline(yintercept = 50)+
  geom_hline(yintercept = 25, linetype = 2)+
  theme_cowplot()+
  xlab("Life stage")+
  
  coord_flip()
```
Figure 4. Number of sets per lifestage (only lifestages that have at least 5 sets). Lines at 50 and 25.

# Sets with both development rate and survival

```{r}
interval_sets <- Ana_rates %>% 
  group_by(set) %>% 
   
  mutate(dr_sum = sum(dr), 
         n_temps = length(unique(temp)), 
         validcount = sum(!is.na(dt)), 
         validcount_s = sum(!is.na(survival))) %>% 
  filter(n_temps > 3, dr_sum > 0, validcount > 3, validcount_s > 3)

interval_set_list <- unique(interval_sets$set) 

interval <- Ana_rates %>% filter(set %in% interval_set_list)

main_stages <- Ana_rates %>% 
  filter(lifestage == "egg"|lifestage == "larva"|lifestage == "pupa")
main_stages_list <- unique(main_stages$lifestage)
inter <- interval %>% filter(lifestage %in% main_stages_list) 
inter$lifestage <- factor(inter$lifestage)

```
There are `r length(unique(interval$set))` sets with both survival and
development time data, from `r length(unique(interval$sp))` families and
`r length(unique(interval$lifestage))` lifestages. 

For the three main lifestages (egg, larva, pupa), there are `r length(unique(inter$set))` sets with both survival and
development time data, from `r length(unique(inter$sp))` species.

# Upper Limit Exploration 
At the upper thermal ranges contained in the dataset, we are interested in seeing whether we capture the ranges at which survival/development rate begin to decrease. 
The function below (is.rise) can be applied to development rate and to survival data. It returns a table with 6 columns:
just.rise: TURE/FALSE. TRUE for curves missing the range at which performance decreases.
ntemp: number of temperature treatments
colds: number of temperature treatments below optimum
hots: number of temperature treatments above optimum
opts: number of temperatures that maximize performance (it's common to observe multiple survival optima)
response: dr/survival

```{r}
is.rise <- function(table, response){
  ta <- select(table, temp, response) # make a 2 column table
  
  if(nrow(ta) == sum(is.na(ta[,2]))){ # this is to discard sets with only NA                                           # values
    print("No data")
  }else{
    
    # get:
    # maximum performance
    p.large <- max(ta[,2], na.rm = T) 
    # maximum temperature
    t.max <- max(ta[,1], na.rm = T)
    # coldest temperature that maximizes performance
    t.large <- filter(ta, ta[,2] == p.large)[["temp"]][[1]]
    # hotest temperature that maximizes performance (eg, survival can be 1 at 
    # multiple temperatures)
    t.largemax <- max(t.large, na.rm = T)
    # temperatures below the coldest optimum
    colds <- filter(ta, ta[,1] < t.large)
    # temperature above the hotest optimum
    hots <- filter(ta, temp > t.largemax)
    # temperatures that maximize performance
    opts <- filter(ta, ta[,2] == p.large)
    
    #output table
    output <- tibble(just.rise =  t.max == t.largemax, # TRUE for curves missing fall
                     ntemp = nrow(ta), # number of temperature treatments
                     colds = nrow(colds), # number of temps below optimum
                     hots = nrow(hots), # number of temps above optimum
                     opts = nrow(opts), # number of optimum temps
                     response = response) # dr or survival
    output
  }
}
```

 Function to determine if minimum development time occurs at the highest temperature
```{r}
# Is development time minimum at the highest temperature? ---------------------------------------------------------------
where.min.dt <- function(table){
  ta <- select(table, temp, dt) # make a 2 column table
  
  if(nrow(ta) == sum(is.na(ta[,2]))){ # this is to discard sets with only NA                                           # values
    print("No data")
  }else{
    
    # get:
    # minimum development time
    min.dt <- min(ta$dt, na.rm = T) 
    # maximum temperature
    t.max <- max(ta$temp, na.rm = T)
    
    min.dt.t <- tail(filter(ta, dt == min.dt)[["temp"]])
    
    #output table
    output <- tibble(is.min.at.hottest =  t.max == min.dt.t) # TRUE when min at hotest
    output
  }
}
```

## Calculate the number of sets where the maximum survival is not at the highest measured temp
This code applies the function above to each set, the result is a tibble of tables
```{r}
qualitycheck <- inter %>% 
  group_by(set) %>% 
  nest() %>% 
  mutate(quality_dr = map2(data, "dr", is.rise), 
         quality_sur = map2(data, "survival", is.rise), 
         is.min.dt.at.max.t = map(data, where.min.dt))
```

Get only the quality metrics by set
```{r}
Qmetrics <- select(qualitycheck, set, quality_dr, quality_sur, is.min.dt.at.max.t) %>% 
  unnest( cols = quality_dr, quality_sur, is.min.dt.at.max.t)
```


## Number of sets where the minimum development time is not at the highest measured temp: `r sum(Qmetrics$is.min.at.hottest == FALSE)`
There are `r length(unique(Qmetrics$set))` sets and most of them  `r sum(Qmetrics$just.rise == FALSE)` report a fall in development time as temperature increases.

There are `r sum(Qmetrics$just.rise1 == FALSE)` sets in which the highest survival is not at the highest temperature

# What are the most popular temperature treatments implemented?

```{r}
ggplot(inter, aes(x = temp))+
  geom_bar()+
  theme_cowplot()+
  facet_grid(~lifestage)
```
Figure 5. Frequency of temperature treatments in "inter" data set by life stage. 
The most popular temperature treatments are 15, 20, 25, 30, and 35.

# Exploratory Graphs
These graphs represent some early data exploration, using the subset of the data that contains both survival and development time for the three main lifestages (egg, larva, pupa). 
```{r}
drhist <- ggplot(Qmetrics, aes(x = hots))+
  geom_histogram() +
  xlab("temps above dr optimum")+
  theme_cowplot()
survivalhist <- ggplot(Qmetrics, aes(x = hots1))+
  geom_histogram() +
  xlab("temps above survival optimum")+
  theme_cowplot()
plot_grid(drhist, survivalhist)

```
Figure 6. Histograms showing most sets in the "inter" subset include at least one temperature above the dr optimum and multple above the survival optimum

# Scale development time
Function to extract longest development time by set and create a new column with scaled development time. 
s_dt = 1 - (dt/maxdt).
```{r}
scale.dt <- function(table){
  maxdt <- max(table$dt, na.rm = T)
  table$s_dt <- 1 - (table$dt/maxdt)
  table
}
```


Apply the function to each set and add s_dt column (scaled dt)
```{r}
inter <- inter %>% 
  group_by(set) %>% 
  nest() %>% 
  mutate(s_dt = map(data, scale.dt)) %>% 
  unnest(cols = s_dt) %>% 
  select(-data)
```

Plot to check that s_dt was calculated properly
```{r}
ggplot(filter(inter, set == 941|set == 1179| set == 3), aes(x = dt, y = s_dt, col = factor(set)))+
  geom_point()+
  scale_color_viridis_d()+
  theme_cowplot()
```
dt: development time, s_dt, scaled development time (0 for longest)

```{r}
egg <- inter[inter$lifestage == "egg", ]
egg$set <- factor(egg$set)
larva <- inter[inter$lifestage == "larva", ]
larva$set <- factor(larva$set)
pupa <- inter[inter$lifestage == "pupa", ]
pupa$set <- factor(pupa$set)
```

## Graph the survival data vs. temperature within each lifestage, with each set as a line, faceted by family 

```{r}
ggplot(data = egg, aes(x = temp, y = survival, color = set, group = set)) + 
  geom_line() +
  facet_wrap(~family) + 
  labs(x = "Temperature", y = "Survival (percentage)", title = "Egg Survival")
```
Figure 5. Egg survival at a range of temperatures, faceted by family. 


```{r}
ggplot(data = larva, aes(x = temp, y = survival, color = set, group = set)) + 
  geom_line() +
  facet_wrap(~family) + 
  labs(x = "Temperature", y = "Survival (percentage)", title = "Larval Survival")
```
Figure 6. Larval survival at a range of temperatures, faceted by family. 


```{r}
ggplot(data = pupa, aes(x = temp, y = survival, color = set, group = set)) + 
  geom_line() +
  facet_wrap(~family) + 
  labs(x = "Temperature", y = "Survival (percentage)", title = "Pupal Survival")
```
Figure 7. Pupal survival at a range of temperatures, faceted by family. 


## Graph the development time data vs. temperature within each lifestage, with each set as a line, faceted by family

```{r}
ggplot(data = egg, aes(x = temp, y = dt, color = set, group = set)) + 
  geom_line() +
  facet_wrap(~family) + 
  labs(x = "Temperature", y = "Development time (days)", title = "Egg Development Time")
```
Figure 8. Egg development time at a range of temperatures, faceted by family. 


```{r}
ggplot(data = larva, aes(x = temp, y = dt, color = set, group = set)) + 
  geom_line() +
  facet_wrap(~family) + 
  labs(x = "Temperature", y = "Development time (days)", title = "Larval Development Time")
```
Figure 9. Larval development time at a range of temperatures, faceted by family. 


```{r}
ggplot(data = pupa, aes(x = temp, y = dt, color = set, group = set)) + 
  geom_line() +
  facet_wrap(~family) + 
  labs(x = "Temperature", y = "Development time (days)", title = "Pupal Development Time")
```
Figure 10. Pupal development time at a range of temperatures, faceted by family. 

## Other exploratory graphs

### Noctuids - an example of family-specific graphs

```{r}
ggplot(data = filter(egg, family == "Noctuidae"), aes(x = temp, y = s_dt, color = set, group = set)) + 
  geom_line() +
  geom_line(data = filter(egg, family == "Noctuidae"), mapping = aes(x = temp, y = survival/100, color = set, group = set), linetype = 2)+
  facet_wrap(~sp) + 
  labs(x = "Temperature", y = "Scaled dt / Survival (dashed)", title = "Noctuid Egg Development and Survival")
```
Figure 11. Egg development rate and survival in the Noctuidae family, faceted by species. 


```{r}
ggplot(data = filter(larva, family == "Noctuidae"), aes(x = temp, y = s_dt, color = set, group = set)) + 
  geom_line() +
  geom_line(data = filter(larva, family == "Noctuidae"), mapping = aes(x = temp, y = survival/100, color = set, group = set), linetype = 2)+
  facet_wrap(~sp) + 
  labs(x = "Temperature", y = "Scaled dev time / Survival (dashed)", title = "Noctuid Larval Development and Survival")
```
Figure 12. Larval development rate and survival in the Noctuidae family, faceted by species. 


```{r}
ggplot(data = filter(pupa, family == "Noctuidae"), aes(x = temp, y = s_dt, color = set, group = set)) + 
  geom_line() +
  geom_line(data = filter(pupa, family == "Noctuidae"), mapping = aes(x = temp, y = survival/100, color = set, group = set), linetype = 2)+
  facet_wrap(~sp) + 
  labs(x = "Temperature", y = "Scaled dev time / Survival (dashed)", title = "Noctuid Pupal Development and Survival")
```
Figure 13. Pupal development rate and survival in the Noctuidae family, faceted by species. 

# Cummulative survival graphs
For studies that report survival of eggs, larvae and pupa, make graphs of cummulative survival:
 
```{r}
# get ids of sets that report survival in all three life stages
egg <- filter(inter, lifestage == "egg", survival >= 0)
egg$set <- factor(egg$set)
larva <- filter(inter, lifestage == "larva",  survival >= 1)
larva$set <- factor(larva$set)
pupa <- filter(inter, lifestage == "pupa",  survival >= 1)
pupa$set <- factor(pupa$set)

egg_ids <- tibble(id = unique(egg$id))
larva_ids <- tibble(id = unique(larva$id))
pupa_ids <- tibble(id = unique(pupa$id))

fullstudies1 <- intersect(egg_ids,larva_ids)
fullstudies <- intersect(fullstudies1,pupa_ids)
# the new table "ontosurvival" contains only those sets
ontosurvival <- inter %>% filter(id %in% fullstudies$id)
ontosurvival$stagenum <- ifelse(ontosurvival$lifestage == "egg", 1, ifelse(ontosurvival$lifestage == "larva",2, 3))
```


# Making a graph with cumulative survival
Here, I calculate cumulative/total suvival at each temp for each set and attempt to plot it. 
```{r}
head(ontosurvival)

combsurvival <- ontosurvival[, c("set", "id", "sp", "lifestage", "temp", "survival", "stagenum")]
```

```{r}


```

